#!/usr/bin/python
# -*- coding: utf-8 -*-

#1. Read URLs list from file;
#2. Download each URL by chunk;
#3. Save each file into specified directory;

import sys, os
import urllib

'''
Author: Bouygues
Date: 4th August, 2013
Version: 1.1

Update Logs:
version 1.0: created
version 1.1: add try ... catch for HttpError handling
version 1.2: can save file to disk by chunk
'''

# --- class ---
class MultiDownload():
    '''
    download urls in list, ignore one not in self.pfix
    save each name according to its url name;
    save them into directory fold;
    '''
    def __init__(self, urls, fold):
        self.urls = urls
        self.fold = self.pathFormat(fold)
        # download by chunk
        self.chunk = 512
        self.pfix = ("jpg", "JPG", "JPEG", "jpeg", "png", "PNG")
        
    def pathFormat(self, path):
        """
        make sure the path is a right directory path;
        and end with '/'
        """
        path = path.strip()
        path = path.replace("\\", "/")
        size = len(path)
        if(path[size-1]!="/"):
            path += "/"
        return path
        
    def getPostfix(self, url):
        if "." not in url:
            return None
        p = url.rfind(".") + 1
        return url[p:]

    def getFileName(self, url):
        if "/" not in url:
            return None
        p = url.rfind("/") + 1
        return url[p:]
    
    def down(self, url):
        filename = self.getFileName(url)
        path = self.fold + filename
        f = open(path, 'wb')
        try:
            w = urllib.request.urlopen(url)
        except:
            print("[ERROR]  %s" % (url))
        else:
            # save file by chunk (if file size is too big)
            while True:
                chunkD = w.read(self.chunk*1024)
                if len(chunkD)==0 :
                    break
                f.write(chunkD)
                print(" - Download %dK data" % (len(chunkD)/1024))
            #data = w.read()
            #f.write(data)
            w.close()
        finally:            
            f.close()         
        
    def run(self):
        for line in self.urls:
            #if self.getPostfix(line) not in self.pfix:
            #    continue
            self.down(line)
            print(line)
        return True
            

# --- global functions ---

    

# --- main ---
'''
download files listed in file
'''
# variables
inputfile = input("input filename: ")
targetList = set()
basedir = input("where do you want to save files? ")


# get url list
f = open(inputfile, 'r')
for line in f:
    line = line.strip()
    if len(line)>3:
        targetList.add(line)
f.close()

# download each file
md = MultiDownload(targetList, basedir)
md.run()

